knitr::opts_chunk$set(
warning=FALSE,
message=FALSE,
results = 'asis',
error = FALSE,
tidy = FALSE,
fig.show = "hold")LPS-MIA base analysis
library(DESeq2)
library(xlsx)
library(Hmisc)
library(dplyr)
library(viridis)
library(pheatmap)
library(variancePartition)
library(RColorBrewer)
library(pcaExplorer)
library(vsn)
library(EnhancedVolcano)
library(rapport)
library(factoextra)
library(broom)
options(check.names = FALSE)PCA.covar <- function(df, meta, heatmap.r=F, data.r=F, heatmap.p=T,type="spearman"){
l <- length(colnames(meta))
l2 <- l+1
pca.cov <- prcomp(t(df))
PC.covs <- pca.cov$x[,1:l]
PC_cov_correlation <- rcorr(as.matrix(PC.covs), as.matrix(meta), type=type)
PC_variance.explained <- PC_cov_correlation$r[1:l,l2:length(rownames(PC_cov_correlation$r))]
PC_cov.cor.p <- PC_cov_correlation$P[1:l,l2:length(rownames(PC_cov_correlation$r))]
if (heatmap.r)
pheatmap(PC_variance.explained, cluster_rows = F, cluster_cols = F, show_colnames = T,
color = heatmap.color.code)
if (heatmap.p)
pheatmap(PC_cov.cor.p, cluster_rows = F, cluster_cols = F, show_colnames = T,
color = colorRampPalette(c("red", "white"))(50),
display_numbers = T)
if (data.r)
return (PC.covs)
}
createDT <- function(DF, caption="", scrollY=500){
data <- DT::datatable(DF, caption=caption,
extensions = 'Buttons',
options = list( dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf', 'print'),
scrollY = scrollY, scrollX=T, scrollCollapse = T, paging = F,
columnDefs = list(list(className = 'dt-center', targets = "_all"))
)
)
return(data)
}#Subsetting a df to samples of interest (accute)
subset.df <- gfilt.df[colnames(gfilt.df) %in% rownames(filt.meta[filt.meta$`Cell line` %in% c("OH2.6") & filt.meta$`Accute/chronic` %in% c("Accute"),])]
subset.df <- subset.df[rowSums(subset.df) >= length(colnames(subset.df)),] #Also removes ERCC SIs
subset.meta <- filt.meta_clean[rownames(filt.meta_clean) %in% colnames(subset.df),]
#Sort columns
subset.meta <- subset.meta[sort(rownames(subset.meta)),]
subset.df <- subset.df[sort(colnames(subset.df))]
all(rownames(subset.meta) == colnames(subset.df))[1] TRUE
#Quick density QC
#550 x 450
plot.density(subset.meta,to.color="Stimulation",to.plot="`Library size`")#Quick covariate analysis
cov_for.cor <- cov_for.cor[sort(rownames(cov_for.cor)),]
cov_for.cor.sub <- cov_for.cor[rownames(cov_for.cor) %in% colnames(subset.df),]
cov_for.cor.sub <- cov_for.cor.sub[!colnames(cov_for.cor.sub) %in% c("Accute/chronic", "Cell line", "Batch", "Experiment")]
covar.cor.sub <- rcorr(as.matrix(cov_for.cor.sub), type="pearson")
covar.cor.sub_1 <- covar.cor.sub$r
#500x450
pheatmap(covar.cor.sub_1, color = heatmap.color.code)#D100 PCA-covariate analysis
#500x500
D100.PCAcovar <- PCA.covar(subset.df,cov_for.cor.sub,data.r=T,type="pearson")
PCA_cov_cor_R(cov_for.cor.sub, subset.df)#For correcting the data
subset.meta$'Log(library size)' <- scale(subset.meta$`Library size`)
subset.meta$`Scaled percent. MT` <- scale(subset.meta$"Percent. MT")
#Create a DESeq2 matrix
dds.complex <- DESeqDataSetFromMatrix(countData = as.matrix(subset.df),
colData = data.frame(subset.meta),
design = ~ Log.library.size. + Percent..ERCC + Stimulation)
#version 2 based on dds object
# Estimate library size correction scaling factors
dds.complex <- estimateSizeFactors(dds.complex)
vsd.complex <- vst(dds.complex)#Doing QC on the vst-transformed values
meanSdPlot(assay(vsd.complex))
sampleDists <- dist(t(assay(vsd.complex)))
sampleDistMatrix <- as.matrix(sampleDists)
rownames(sampleDistMatrix) <- colnames(vsd.complex)
colnames(sampleDistMatrix) <- NULL
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
annot_df <- data.frame(colData(dds.complex)$Stimulation)
colnames(annot_df) <- c("Stimulation")
rownames(annot_df) <- colnames(dds.complex)
annot_colors <- vector('list', length=1)
annot_colors$Stimulation[["CTR"]] <- "#00AFBB"
annot_colors$Stimulation[["LPS"]] <- "#E7B800"#Heatmap intersample-distance
pheatmap(sampleDistMatrix,
clustering_distance_rows=sampleDists,
clustering_distance_cols=sampleDists,
col=colors,annotation_row = annot_df, annotation_colors = annot_colors)#IQR of samples
norm.df <- assay(vsd.complex)
sOutliers.norm <- IQRoutlier.detector(norm.df,barplot=T)#PCA
#550x450
pca.norm <- plotPCA.custom(as.matrix(norm.df), intgroup=c("Cell line", "Stimulation","Experiment"),
ntop = 50000, returnData=TRUE, metadata=subset.meta)
PoV.norm <- round(100 * attr(pca.norm, "percentVar"))
PCAplot(pca.norm, Condition="Stimulation", PoV.df=PoV.norm)#Remove batch effects
#Create a meta df that corresponds to the sorted columns of the vsd assay but has covariates as numeric factors
covs_forremBatch <- subset.meta
covs_forremBatch$Stimulation <- as.factor(as.numeric(covs_forremBatch$Stimulation))
covs_forremBatch$Experiment <- as.factor(as.numeric(covs_forremBatch$Experiment))
covs_forremBatch$`Cell line` <- as.factor(as.numeric(covs_forremBatch$`Cell line`))
covs_forremBatch$`Batch` <- as.factor(as.numeric(covs_forremBatch$`Batch`))
covs_forremBatch$Replicate <- as.factor(as.numeric(as.factor(covs_forremBatch$Replicate)))
covs_forremBatch <- covs_forremBatch[!colnames(covs_forremBatch) %in% c("Duplicate","Accute/chronic")]
batch.rem <- removeBatchEffect(as.matrix(assay(vsd.complex)),
covariates=as.matrix(cbind(covs_forremBatch$`Log(library size)`,
covs_forremBatch$`Percent. ERCC`
)),
design=model.matrix(~ covs_forremBatch$Stimulation))#====PC loadings for covariates====#
#500x500
covs_forremBatch <- covs_forremBatch[colnames(covs_forremBatch) %in% colnames(cov_for.cor.sub)]
D100.PCAcovar.postcorrection <- PCA.covar(batch.rem,covs_forremBatch,data.r=T,type="pearson",heatmap.r=T)
#PCA (550 x 450)
pca.cor <- plotPCA.custom(as.matrix(batch.rem), intgroup=c("Stimulation","Cell line","Experiment"),
ntop = 50000, returnData=TRUE, metadata=subset.meta, pc.1=1,pc.2=2)
PoV.cor <- round(100 * attr(pca.cor, "percentVar"))#Inter sample distances post correction
sampleDists.postcor <- dist(t(batch.rem))
sampleDistMatrix.postcor <- as.matrix(sampleDists.postcor)
rownames(sampleDistMatrix.postcor) <- colnames(vsd.complex)
colnames(sampleDistMatrix.postcor) <- NULL
#Heatmap intersample-distance
#650x500
pheatmap(sampleDistMatrix.postcor, show_rownames = F,
clustering_distance_rows=sampleDists.postcor,
clustering_distance_cols=sampleDists.postcor,
col=colors, annotation_row = annot_df, annotation_colors = annot_colors)dds.complex <- DESeq(dds.complex)
res.complex <- results(dds.complex, name="Stimulation_LPS_vs_CTR")
summary(res.complex)out of 17498 with nonzero total read count adjusted p-value < 0.1 LFC > 0 (up) : 9, 0.051% LFC < 0 (down) : 4, 0.023% outliers [1] : 0, 0% low counts [2] : 3732, 21% (mean count < 10) [1] see ‘cooksCutoff’ argument of ?results [2] see ‘independentFiltering’ argument of ?results
(5.1) Results
#Volcano plot
#700x750
EnhancedVolcano(res.complex,
lab = rownames(res.complex),
x = 'log2FoldChange',
y = 'padj',
pCutoff = 0.05,
FCcutoff = 2,
labSize = 6,
legendPosition = 'right')#Grab significant downreg & highest lfc genes
if (length(res.complex[is.na(res.complex$padj),]$padj) != 0)
res.complex[is.na(res.complex$padj),]$padj <- 1
downreg.genes <- rownames(res.complex[res.complex$log2FoldChange < -2 & res.complex$padj < 0.05,])
upreg.genes <- rownames(res.complex[res.complex$log2FoldChange > 2 & res.complex$padj < 0.05,])
#Plot the genes
#heatmap based on sorted samples for Stimulation:
fact.to.sort <- as.numeric(annot_df$Stimulation)
sorted <- batch.rem[,order(fact.to.sort, decreasing=F)]
sub <- sorted[rownames(sorted) %in% c(upreg.genes,downreg.genes),]
#550x1300
pheatmap(sub,
cluster_rows = T,
cluster_cols = F, annotation_legend = T, show_colnames = F,
annotation_col=annot_df[-2],
color = heatmap.color.code, scale='row', annotation_colors = annot_colors)(6.1) LPS response and microglia genes
LPS.genes <- sort(c("IFT1", "IFT2", "IFT3", "CXCL8", "CX3CR1", "IL10", "IL1B", "TNF",
"IL6", "TMEM119", "AIF1", "TLR4", "IL12A", "IL23A", "CD40", "P2RY12", "GFAP"))
LPS.sub <- sorted[rownames(sorted) %in% c(LPS.genes),]
pheatmap(LPS.sub,
cluster_rows = T,
cluster_cols = F, annotation_legend = T, show_colnames = F,
annotation_col=annot_df[-2],
color = heatmap.color.code, scale='row', annotation_colors = annot_colors)#Boxplots of DEGs
rownames(sub) <- gsub('\\.', '_', rownames(sub))
rownames(sub) <- gsub('\\-', '_', rownames(sub))
box <- data.frame(scale(t(sub)), Stimulation=annot_df[order(fact.to.sort, decreasing=F),])
#box.chr <- data.frame(t(sub.ac), Stimulation=sub.meta.ac$Stimulation)
box$Stimulation <- factor(box$Stimulation, levels=c("CTR","LPS"))
#Boxplots
library(ggplot2)
all.p <- NULL
for (i in rownames(sub)){
n <- as.factor(i)
p =
ggplot(box, aes_string(x="Stimulation", y=i, fill="Stimulation")) +
geom_boxplot() +
scale_fill_manual(values = c("#00AFBB", "#E7B800", "#FC4E07"))+
theme(
legend.position="Type",
plot.title = element_text(size=11)
) +
theme_bw()+
theme(plot.title = element_text(hjust=0.5))+
ylab(paste(i,"(residual expression)"))+
theme(axis.text.x=element_blank(), axis.title.x=element_blank())
all.p[[i]] <- p}
library(ggpubr)
#700x550
all.genes <- ggarrange(plotlist=all.p[1:length(all.p)], common.legend = T)
all.genes